from selenium import webdriver
import time

import urllib.request

from bs4 import BeautifulSoup

import html.parser

from multiprocessing import Pool

def main():
    # *********  Open chrome driver and type the website that you want to view ***********************
    # https://www.zhihu.com/question/26037846 下面的地址都是点击加载更多之后的新地址
    driver = webdriver.Chrome()   # 打开浏览器

    # 列出来你想要下载图片的网站
    # driver.get("https://www.zhihu.com/question/333026642")   # 河南姑娘有多漂亮？
    driver.get("https://www.zhihu.com/question/51593530")   # 河南姑娘有多漂亮？
    # driver.get("https://www.zhihu.com/question/60950349") # 女生怎么穿搭会看着很温柔?
    # driver.get("https://www.zhihu.com/question/26297181") # 有个漂亮女朋友是什么样的体验？
    # driver.get("https://www.zhihu.com/question/265911703")# 高中时素颜就好看的女生长大化妆后岂不逆天？
    # driver.get("https://www.zhihu.com/question/285321190")# 男生长得丑，有个漂亮女友是什么样的体验?
    # driver.get("https://www.zhihu.com/question/28560777") # 女朋友很漂亮是怎样的体验？
    # driver.get("https://www.zhihu.com/question/285906324")# 有个漂亮女朋友是什么样的体验？
    # driver.get("https://www.zhihu.com/question/55952517") # 女生有个特别漂亮的女朋友是什么体验？
    # driver.get("https://www.zhihu.com/question/28997505") # 有个漂亮女朋友是什么体验？
    # driver.get("https://www.zhihu.com/question/35931586") # 你的日常搭配是什么样子？
    # driver.get("https://www.zhihu.com/question/61235373") # 女生腿好看胸平是一种什么体验？
    # driver.get("https://www.zhihu.com/question/28481779") # 腿长是一种什么体验？
    # driver.get("https://www.zhihu.com/question/19671417") # 拍照时怎样摆姿势好看？
    # driver.get("https://www.zhihu.com/question/20196263") # 女性胸部过大会有哪些困扰与不便？
    # driver.get("https://www.zhihu.com/question/46458423") # 短发女孩要怎么拍照才性感？
    # driver.get("https://www.zhihu.com/question/26037846") # 身材好是一种怎样的体验？
    # ****************** Scroll to the bottom, and click the "view more" button *********
    def execute_times(times):

        for i in range(times):
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(1)
           # 输入的网页就是点击查看更多后跳转的新网页，所以不需要再模拟实现点击效果
           # driver.find_element_by_css_selector('button.QuestionMainAction').click()
           # print("page" + str(i))
           # time.sleep(1)


    execute_times(40)


    # ****************   Prettify the html file and store raw data file  *****************************************

    result_raw = driver.page_source # 这是原网页 HTML 信息
    result_soup = BeautifulSoup(result_raw, 'html.parser')
    noscript_nodes = result_soup.find_all('noscript') # 找到所有<noscript>node
    count = 0
    for noscript in noscript_nodes:
        noscript_inner = noscript.find('img') # 获取<noscript>node内部内容
        print(len(noscript_inner))
        img_url = noscript_inner.get('src')
        print("下载第"+str(count)+"张")
        urllib.request.urlretrieve(img_url, "./images/" + str(count) + ".jpg")
        count +=1
    print("下载完成")
if __name__ == '__main__':
    main()